%dataset_quarterly_sw.m
%reads in data and transforms to desired dataset

function [Y, X, vars, lev,data_]=dataset_quarterly_sw_var(esty1,estq1,esty2,estq2,nlags,shock_extract,use_yields,slope)

%Step 1: load yield data and select yields specified in use_yields
%-----------------------------------------------------------------

if use_yields == 1
    
    yield3m = xlsread('YieldData.xls','CRSPme quarterly','D3:D196');     % 3-month yield in annualized basis; 1959:2 - 2007:3; *** last 3 years need to be updated *** 
    yield5y = xlsread('YieldData.xls','CRSPme quarterly','L3:L196');     % 5-year yield in annualized basis; 1959:2 - 2007:3;
    yield10y = xlsread('YieldData.xls','GSW quarterly','K3:K196');       % 10-year yield in annualized basis; 1971:4-2009:3 from GSW; read in for 1959:2 - 2007:3;     

    yields = [yield5y (yield5y - yield3m) yield3m]; % read in long-rate, spread and short-rate
    yieldvars=['    5-year bond yield      '
               ' Spread (5-year - 3-month) '
               '     3-month bond yield    '];
         
    if slope == 0;
        ts_select=[1 2];
    elseif slope == 1;
        ts_select=[2 1];
    end
     
elseif use_yields == 2
    ffr = xlsread('YieldData.xls','CRSPme quarterly','N3:N196');
    yield5y = xlsread('YieldData.xls','CRSPme quarterly','L3:L196');     % quarter average 5-year yield in annualized basis; 1959:2 - 2007:3;
    yield10y = xlsread('YieldData.xls','GSW quarterly','K3:K196');       % quarter average 10-year yield in annualized basis; 1971:4-2009:3 from GSW; read in for 1959:2 - 2007:3;

    yields = [yield5y (yield5y - ffr)]; % read in long-rate and spread
    yieldvars=['     5-year bond yield     '
               'Spread (5-year - Fedfunds) '];
         
    if slope == 0;
        ts_select=[1];
    elseif slope == 1;
        ts_select=[2];
    end
                   
elseif use_yields == 3
    yields=xlsread('factors.xls','factors_quarterly','F4:H188');     %Diebold-Li factor extraction based on 3-60 months yields
                                                                    %level, slope, curvature; 1959:2 - 2005:2; 

    % yields=xlsread('factors.xls','factors_quarterly','B4:D188');     %Diebold-Li factor extraction based on 3-120 months yields from DRA
                                                                      %level, slope, curvature; 1972:1 - 2000:4;                                                                
    yieldvars=['           Level           '
               '           Slope           '
               '         Curvature         '];
         
    if slope == 0;
        ts_select=[1 2];
    elseif slope == 1;
        ts_select=[2 1];
    end
  
end

%Step 2: load macro data and select macro variables in VAR
%---------------------------------------------------------


%macro=xlsread('macrodata_junk.xls',1,'B7:M166');  %see xls sheet for details on data

load var_monte_data;
macro=var_monte_data;
%trend=xlsread('trend.xls',1,'C2:E181');
%mmacro=xlsread('macrodata_SW_tfp_hours2.xls',1,'B19:R198');

%macro=xlsread('macrodata_SW_tfp_hours.xls',1,'B19:R198');
%macro=xlsread('macrodata_SW_tfp_hours.xls',1,'B19:R198');

%load var_monte_data_ii;
%var_monte_data_new=var_monte_data_ii(:,:,2);
%macro=var_monte_data_new;


%macro=xlsread('macrodata_junk.xls',1,'B7:M166');  %see xls sheet for details on data
 

%macro=load(var_monte_data.mat);  %see xls sheet for details on data: B15 (B19) for the start of 1959q1 (1960q1)
%macro=xlsread('macrodata_SW_tfp_hours.xls',1,'B19:Q198');  %see xls sheet for details on data: B15 (B19) for the start of 1959q1 (1960q1)
%macro=xlsread('macrodata_SW_tfp_hours.xls',1,'B19:Q210');  %see xls sheet for details on data: B15 (B19) for the start of 1959q1 (1960q1)
                                                                      
macrovars =   ['             TFP           '      %1) TFP
               '          GDP              '      %2) GDP				                                                
               '         Consumption       '      %3) real c
               '          Investment       '      %4) real i
               '       Hours               '      %5) Hours 					                                                
               '     Federal Funds rate    '      %6) fed funds rate
               '         Inflation         '      %7) GDP inflation  
 			   '       Stock(capital)      '      %8) Value of capital
               '       Stock(int.firms)    '      %9) Value of technology(i.e., intermediate firms)
               '        Aggregate stock    '      %10) Total value of firms (capital + Technology)
               '             Conf          '];    %11) Confidence:Tobin q (for non conifidense case)  
 


%rescaling some of the variables         
  %macro=100*macro;
  
  
  %mmacro_f(:,1)=bpass(mmacro(:,1),2,200);
  %mmacro_f(:,2)=bpass(mmacro(:,2),2,200);
  %mmacro_f(:,3)=bpass(mmacro(:,3),2,200);
  %mmacro_f(:,4)=bpass(mmacro(:,4),2,200);
  %mmacro_f(:,7)=bpass(mmacro(:,7),2,200);
  %mmacro_f(:,9)=bpass(mmacro(:,9),2,200);
  %mmacro_f(:,11)=bpass(mmacro(:,11),2,200);
  %mmacro_f(:,14)=bpass(mmacro(:,14),2,200);
  %mmacro_f(:,15)=bpass(mmacro(:,15),2,200);
  
  
  %mmacro_t(:,1)=mmacro(:,1)- mmacro_f(:,1);
  %mmacro_t(:,2)=mmacro(:,2)- mmacro_f(:,2);
  %mmacro_t(:,3)=mmacro(:,3)- mmacro_f(:,3);
  %mmacro_t(:,4)=mmacro(:,4)- mmacro_f(:,4);
  %mmacro_t(:,7)=mmacro(:,7)- mmacro_f(:,7);
  %mmacro_t(:,9)=mmacro(:,9)- mmacro_f(:,9);
  %mmacro_t(:,11)=mmacro(:,11)- mmacro_f(:,11);
  %mmacro_t(:,14)=mmacro(:,14)- mmacro_f(:,14);
  %mmacro_t(:,15)=mmacro(:,15)- mmacro_f(:,15);
  
  
  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%macro(:,1)=macro(:,1)+mmacro_t(:,7); %TFP
%macro(:,2)=macro(:,2)+mmacro_t(:,1); %Y
%macro(:,3)=macro(:,3)+mmacro_t(:,2); %C
%macro(:,4)=macro(:,4)+mmacro_t(:,3); %INV
%macro(:,5)=macro(:,5)+mmacro_t(:,11); %h
%macro(:,6)=macro(:,6)+mmacro_t(:,4);%i
%macro(:,7)=macro(:,7); %pai
%macro(:,7)=macro(:,7)+ mmacro_t(:,9);%pai
%macro(:,8)=macro(:,8);
%macro(:,9)=macro(:,9)+mmacro_t(:,14); %conf
 

%%%%%%%%%%%%%%%%%%%%%%%%%%%
%macro(:,1)=exp(macro(:,1))-1+mmacro_t(:,7); %TFP
%macro(:,2)=exp(macro(:,2))-1+mmacro_t(:,1); %Y
%macro(:,3)=exp(macro(:,3))-1+mmacro_t(:,2); %C
%macro(:,4)=exp(macro(:,4))-1+mmacro_t(:,3); %INV
%macro(:,5)=macro(:,5)+mmacro_t(:,11); %h
%macro(:,6)=macro(:,6)+mmacro_t(:,4);%i
%macro(:,7)=macro(:,7); %pai
%macro(:,7)=macro(:,7)+ mmacro_t(:,9);%pai
%macro(:,8)=macro(:,8);
%macro(:,9)=(exp(macro(:,9))-1)+mmacro_t(:,14); %conf

 %%%%%%%%%%%%%
 %macro(:,1)=exp(macro(:,1))-1+trend(1:180,2);
 %macro(:,2)=exp(macro(:,2))-1+trend(1:180,1)+trend(1:180,2);
 %macro(:,3)=exp(macro(:,3))-1+trend(1:180,1)+trend(1:180,2);
 %macro(:,4)=exp(macro(:,4))-1+trend(1:180,1)+trend(1:180,2);
 %macro(:,5)=macro(:,5);
 %macro(:,6)=macro(:,6);
 %macro(:,7)=macro(:,7);
 %macro(:,8)=exp(macro(:,8))-1+trend(1:180,2);
 %macro(:,9)=(exp(macro(:,9))-1);
%%%%%%%%%%%%%
  
%%%%%%%%%%%%% 
%macro(:,1)=exp(macro(:,1))-1;
%macro(:,2)=exp(macro(:,2))-1;
%macro(:,3)=exp(macro(:,3))-1;
%macro(:,4)=exp(macro(:,4))-1;
%macro(:,5)=exp(macro(:,5))-1;
%macro(:,6)=macro(:,6);
%macro(:,7)=macro(:,7);
%%macro(:,8)=exp(macro(:,8))-1;
%macro(:,8)=macro(:,8);
%%macro(:,11)=(exp(macro(:,11))-1);

%%%%%%%%%%%%%


%macro_select=[1 2 3 4 5 6 7 ]; % var_data= 20  inv & no conf
%macro_select=[1 2 3 5 6 7 11 ]; % var_data= 21  noinv & conf
macro_select=[1 2 3 5 6 7  ]; % var_data= 22  noinv & no conf
%macro_select=[1 2 3 4 5 7 ]; % var_data= 25  inv & no conf &no interest

%macro_select=[1 2 3 4 5 6]; % var_data= 22  noinv & no conf


%Step 3: setup VAR and compute lags
%----------------------------------

%select sample period
daty1=1957;       % First Year of Data Set
datq1=2;       % First Quarter of Data Set
n1 = (esty1-daty1)*4 + ( estq1 - datq1 + 1) ;         
n2 = (esty2-daty1)*4 + ( estq2 - datq1 + 1) ;
       
if shock_extract == 1 
    %-----------------------------------------------------
    % data structure: term structure var + macro var
    %----------------------------------------------------- 
    if use_yields == 0;
        data = macro(n1:n2,macro_select);
        vars = macrovars(macro_select,:);
    else
        data = [yields(n1:n2,ts_select) macro(n1:n2,macro_select)];
        vars = [yieldvars(ts_select,:); macrovars(macro_select,:)];
    end
    
elseif (shock_extract == 2 | shock_extract == 3) 
    %-----------------------------------------------------
    % data structure: macro var(TFP ordered first) 
    %                      + term structure var
    %-----------------------------------------------------
    if use_yields == 0;
        %data = macro(1:160,macro_select);
        data = macro(n1:n2,macro_select);
        vars = macrovars(macro_select,:);
        data_=data;
    else
        data = [macro(n1:n2,macro_select) yields(n1:n2,ts_select)];
        vars = [macrovars(macro_select,:); yieldvars(ts_select,:)];    
%     data=[macro(n1:n2,macro_select(1:3)) ffr(n1:n2) yields(n1:n2,ts_select)]; % for end-of-quarter i-rates
    end
    
else
    display('must select a shock extraction method')
    return
end

%checking for out-of-sample values
if sum(sum(data>99999));
    disp('dataseries out of range')
    return
end

%set prior for first lag of each variable (if estimation with Minnesota
%prior
[rowy,coly]=size(data);
lev=ones(coly,1);            

%compute nlags lags
[T,nvars]=size(data);
for p=1:nlags;
    X(:,1+(p-1)*nvars:p*nvars)=data((nlags+1-p):(T-p),:);
    %gives (T-nlags) x nvars*nlags matrix of lags for all variables
    %first lag of all variables first, then second lag of all variables
    %and so on...                                               
end;

%rescaling variables since we loose nlags observations through the lagging 
Y=data((nlags+1):T,:);